# Author:    	Annekatrin Deglow, Ralph Sundberg
# Contact:   	annekatrin.deglow@pcr.uu.se, ralph.sundberg@pcr.uu.se
# Script:   	ADRS_analysis.R
# Dataset:   	ADRS_analysis.dta
# Article:   	To Blame or To Support? Large-scale Insurgent Attacks on Civilians and Public Trust in State Institutions
# Journal:    International Studies Quarterly

# ------------------------------------------------------------------------------------------------------
# Set up 
# ------------------------------------------------------------------------------------------------------
rm(list=ls(all=TRUE))
# Set working directory here

# Load packages
library(haven)
library(dplyr)
library(broom)
library(dotwhisker)

# Load data
ADRS_analysis <- read_dta("ADRS_analysis.dta")


#-----------------------------------------------------------------------------------------------------------
# Figure 1: Treatment and control group for the Kabul City sample
#-----------------------------------------------------------------------------------------------------------
# Save date per day in vector
days <- c(17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28)

# Save number of respondents per day in vector 
table(ADRS_analysis$day_int)
n.resp <- c(97,6,8,204,82,100,90, 54,34, 28, 13)

# Combine dates and number of respondents
n.resp.days <- as.data.frame(cbind(n.resp, days))

# Figure 1
plot(n.resp.days$days, n.resp.days$n.resp, 
     col=c("gray70","gray70","gray70","gray70","gray40","gray40","gray40","gray40","gray40","gray40","gray40"),
     lty=1, lwd=20, cex.lab=1.5,
     type = "h",
     pch=21,
     bg="black",
     xaxt="n",
     yaxt="n",
     ylim=c(0,220),
     xlab = "Interview days (June 2012)",
     ylab = "Number of respondents")
axis(side = 1,cex.axis=1.2, at = c(17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, size=8))
axis(side = 2,cex.axis=1.2, at=c(0,20,40,60,80,100,120,140,160,180,200,220))
abline(v=21.5, col="black", lty=2,lwd=2)
text(20.7, 130, expression("Attack on the Spozhmai Hotel"), srt="90")
axis(side=3,at=c(17,21),tcl=0.5,lwd.ticks=1,col.ticks="black", mgp=c(0,0,0.8), col.axis="white", cex.axis=0.2)
mtext(side=3,  text=(expression('N'['control']*' = 318')),line=1, at=(19), cex=1.1)
axis(side=3,at=c(21.1, 28),tcl=0.5,lwd.ticks=1,col.ticks="black",mgp=c(0,0,0.8), col.axis="white", cex.axis=0.2)
mtext(side=3,  text=(expression('N'['treatment']*' = 401')),line=1, at=(24.5), cex=1.1)
axis(side=3,at=c(17, 28),tcl=0.5,lwd.ticks=1,col.ticks="black",mgp=c(0,0,2.6), col.axis="white", cex.axis=0.2)
mtext(side=3, text=(expression('N'['total']*' = 719')),line=2.8, at=(22.5), cex=1.1)

dev.off()


#-----------------------------------------------------------------------------------------------------------
# Figure 2: Distribution of interviews by treatment and control group and sub-districts
#-----------------------------------------------------------------------------------------------------------
# Proportion of interviews in each subdistrict for treatment (treatment_dummy=1) and control (treatment_dummy=0) group
prop.treat  <- as.data.frame(prop.table(table(ADRS_analysis$district[ADRS_analysis$treatment_dummy==1]))*100) 
prop.control <- as.data.frame(prop.table(table(ADRS_analysis$district[ADRS_analysis$treatment_dummy==0]))*100)

# Figure 2 (note that the figure displays numeric labels behind the district codes, and not the district codes itself)
# 1 = Kabul, District No 7
# 2 = Kabul, District No 15 
# 3 = Kabul, District No 10 
# 4 = Kabul, District No 6
# 5 = Kabul, District No 8
# 6 = Kabul, District No 4
# 7 = Kabul, District No 5
# 8 = Kabul, District No 9
# 9 = Kabul, District No 11
# 10 = Kabul, District No 13
# 11 = Kabul, District No 16
# 12 = Kabul, District No 3
# 13 = Kabul, District No 2
# 14 = Kabul, District No 1
# 15 = Kabul, District No 17
# 16 = Kabul, District No 12

plot(prop.control$Freq,  main="", type="h", lwd=20,col="gray70", bty="n",
     xlab="Kabul City's Sub-districts",
     ylab="Proportion of interviews (%)", cex.lab=1.4,
     xaxt="n", ylim=c(0,14))
lines(prop.treat$Freq, type="h", lwd=8, col="gray40")
axis(side=1, seq(0,16, 1),las=2,cex.axis=1.4)
legend(11,12, 
       legend = c("Treatment group", "Control group"), 
       col = c("gray40", "gray70"), 
       lwd=c(8, 20),
       bty = "n", 
       cex = 0.85, 
       text.col = "black", 
       horiz = F , 
       inset = c(0.1, 0.1), text.font=3)

dev.off()

#-----------------------------------------------------------------------------------------------------------
# Figure 3: Balance test
#-----------------------------------------------------------------------------------------------------------
# Save income and education as factor/categorical variables
ADRS_analysis$income <- as.factor(ADRS_analysis$income)
ADRS_analysis$education <- as.factor(ADRS_analysis$education)

# Regress treatment status on pre-treatment covariates
balance_test <- glm(treatment_dummy ~ gender_dummy + age +  pashtun_dummy + radio_dummy + income + education,
                   data=ADRS_analysis, family=binomial(logit))
summary(balance_test)
# Figure 3: coefficient plot
by_var <- list(c("Income", "2,001 - 3,000","More than 40,000"),
               c("Education", "Primary school (incomplete)", "University (or above)"))

{dwplot(balance_test, conf.level = .95,
        whisker_args = aes(colour="black"),
        dot_args = aes(colour = "black"),
        vline = geom_vline(xintercept = 0, colour = "black", linetype = 2)) %>% # plot line at zero _behind_ coefs
    relabel_predictors(c(gender_dummy = "Gender",
                         age = "Age",
                         pashtun_dummy="Ethnicity",
                         radio_dummy="Radio ownership",
                         income2="2,001 - 3,000",
                         income3="3,001 - 5,000",
                         income4="5,001 - 10,000",
                         income5="10,001 - 15,000",
                         income6="15,001 - 20,000",
                         income7="20,001 - 25,000",
                         income8="25,001 - 40,000",
                         income9="More than 40,000",
                         education2="Primary school (incomplete)",
                         education3="Primary school (complete)",
                         education4="Secondary school (incomplete)",
                         education5="Secondary school (complete)",
                         education6="High school",
                         education7="University (or above)")) +  
    theme_classic()+
    xlab("Coeffiecient estimates")} %>%
  add_brackets(by_var)

dev.off()

#------------------------------------------------------------------------------------------------------
# Online Appendix: P-values Corrected for Multiple Hypotheses Testing
#------------------------------------------------------------------------------------------------------
temp_list <- list()

for(i in 1:3){
  pvalues <- c(.001,.033, .043)
  pvalue_method <- c( "bonferroni", "holm", "hochberg")
  adj_pvalue <- p.adjust(pvalues, method=pvalue_method[i])
  temp_list[[i]] <- data.frame(pvalue_method=pvalue_method[i], pvalues, adj_pvalue)
}
do.call("rbind", temp_list)

